import_data("jake_gyllenhaal") 
filmes <- read_imported_data()
filmes %>% 
    glimpse()
Observations: 20
Variables: 5
$ avaliacao  <int> 92, 68, 73, 52, 73, 59, 82, 85, 92, 49, 35, 64, 47, 90, 87, 61, 62, 44, ...
$ filme      <chr> "Stronger", "Life", "Nocturnal Animals", "Demolition", "Everest", "South...
$ papel      <chr> "Jeff Bauman", "David Jordan", "Tony HastingsEdward Sheffield", "Davis M...
$ bilheteria <dbl> 4.2, 30.2, 10.7, 1.7, 46.6, 42.4, 61.0, 39.1, 54.7, 33.3, 90.8, 28.6, 9....
$ ano        <int> 2017, 2017, 2016, 2016, 2015, 2015, 2013, 2012, 2011, 2010, 2010, 2009, ...

Data Overview

Bilheteria

filmes %>% 
    ggplot(aes(x = ano, y = bilheteria)) + 
    geom_point(size = 4, color = paleta[1]) 

filmes %>% 
    ggplot(aes(x = bilheteria)) + 
    geom_histogram(binwidth = 10, boundary = 0, 
                   fill = "grey", color = "black") + 
    geom_rug(size = .5) +
    scale_x_continuous(breaks=seq(0,210,10))

filmes %>% 
    group_by(filme) %>%
    ggplot(aes(sample=bilheteria)) + 
        stat_qq()

p <- filmes %>% 
    ggplot(aes(x = "",
               y = bilheteria,
               label = filme,
               text = paste("Filme:",filme,
                            "\nBilheteria:",
                            bilheteria,"m"))) + 
    geom_jitter(width = .05, alpha = .3, size = 3) + 
    labs(x = "")
ggplotly(p, tooltip="text") %>% 
    layout(autosize = F)

Avaliação

filmes %>% 
    ggplot(aes(x = ano, y = avaliacao)) + 
    geom_point(size = 4, color = paleta[1])  +
    scale_y_continuous(limits = c(0, 100))

filmes %>% 
    ggplot(aes(x = avaliacao)) + 
    geom_histogram(binwidth = 10, boundary = 0, 
                   fill = paleta[3], color = "black") + 
    geom_rug(size = .5) 

filmes %>% 
    group_by(filme) %>%
    ggplot(aes(sample=avaliacao)) + 
    stat_qq() 

p <- filmes %>% 
    ggplot(aes(x = "",
               y = avaliacao,
               text = paste(
                    "Filme:",filme,
                    "\nAvaliação:",avaliacao))) + 
    geom_jitter(width = .05, alpha = .3, size = 3) + 
    labs(x = "")
ggplotly(p, tooltip = "text") %>% 
    layout(autosize = F)

Agrupamento hierárquico

Uma dimensão

agrupamento_h = filmes %>% 
    mutate(nome = paste0(filme, " (av=", avaliacao, ")")) %>% 
    as.data.frame() %>% 
    column_to_rownames("filme") %>% 
    select(avaliacao) %>%
    dist(method = "euclidian") %>% 
    hclust(method = "ward.D")
ggdendrogram(agrupamento_h, rotate = T, size = 2, theme_dendro = F) + 
    labs(y = "Dissimilaridade", x = "", title = "Dendrograma")

get_grupos <- function(agrupamento, num_grupos){
    agrupamento %>% 
        cutree(num_grupos) %>% 
        as.data.frame() %>% 
        mutate(label = rownames(.)) %>% 
        gather(key =  "k", value = "grupo", -label) %>% 
        mutate(grupo = as.character(grupo))
}
atribuicoes = get_grupos(agrupamento_h, num_grupos = 1:6)
atribuicoes = atribuicoes %>% 
    left_join(filmes, by = c("label" = "filme"))
atribuicoes %>% 
    ggplot(aes(x = "Filmes", y = avaliacao, colour = grupo)) + 
    geom_jitter(width = .02, height = 0, size = 1.6, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2")

k_escolhido = 3
p <-atribuicoes %>% 
    filter(k == k_escolhido) %>% 
    ggplot(aes(x = reorder(label, avaliacao),
               y = avaliacao,
               colour = grupo,
               text = paste(
                    "Filme:", reorder(label, avaliacao),
                    "\nAvaliação:", avaliacao,
                    "\nGrupo:", grupo))) + 
    geom_jitter(width = .02, height = 0, size = 3, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2") + 
    labs(x = "", y = "Avaliação RT") + 
    coord_flip()
ggplotly(p,tooltip = "text") %>%
    layout(autosize = F)

Com duas dimensões

agrupamento_h_2d = filmes %>%
   mutate(bilheteria = log10(bilheteria)) %>%
   mutate_at(vars("avaliacao", "bilheteria"), funs(scale)) %>%
   column_to_rownames("filme") %>%
   select("avaliacao", "bilheteria") %>%
   dist(method = "euclidean") %>%
   hclust(method = "ward.D")
Setting row names on a tibble is deprecated.
ggdendrogram(agrupamento_h_2d, rotate = TRUE, theme_dendro = F)

filmes2 <- filmes %>%
    mutate(bilheteria = log10(bilheteria))
plota_hclusts_2d(agrupamento_h_2d,
                filmes2,
                c("avaliacao", "bilheteria"),
                linkage_method = "ward.D", 
                ks = 1:6,
                palette = "Dark2") + 
    scale_y_log10()

atribuicoes = get_grupos(agrupamento_h_2d, num_grupos = 1:6)
atribuicoes = atribuicoes %>% 
    filter(k == 3) %>%
    mutate(filme = label) %>% 
    left_join(filmes, by = "filme")
p <- atribuicoes %>%
    ggplot(aes(x = avaliacao,
               y = bilheteria,
               colour = grupo,
               text = paste(
                    "Filme:", filme,
                    "\nBilheteria:", bilheteria,"m\n",
                    "Avaliação:", avaliacao))) + 
    geom_jitter(width = .02, height = 0, size = 3, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2") +
    scale_y_log10()
ggplotly(p, tooltip = "text") %>%
    layout(autosize = F)
LS0tCnRpdGxlOiAiVGlwb3MgZGUgZmlsbWUgZGUgSmFrZSBHeWxsZW5oYWFsIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwotLS0KCmBgYHtyIGVjaG89RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGhlcmUpCmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShwbG90bHkpCmxpYnJhcnkoZ2dkZW5kcm8pCgpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9saWIuUiIpKQpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9wbG90YV9zb2x1Y29lc19oY2x1c3QuUiIpKQoKdGhlbWVfc2V0KHRoZW1lX3JlcG9ydCgpKQoKa25pdHI6Om9wdHNfY2h1bmskc2V0KHRpZHkgPSBGQUxTRSwKICAgICAgICAgICAgICAgICAgICAgIGZpZy53aWR0aCA9IDYsCiAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0ID0gNSwKICAgICAgICAgICAgICAgICAgICAgIGVjaG8gPSBUUlVFKQpwYWxldGEgPSBjKCIjNDA0RTREIiwKICAgICAgICAgICAiIzkyRENFNSIsCiAgICAgICAgICAgIiM5MzhCQTEiLAogICAgICAgICAgICIjMkQzMTQyIiwKICAgICAgICAgICAiI0Y0NzQzQiIpCmBgYAoKYGBge3IgcmVhZCwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KaW1wb3J0X2RhdGEoImpha2VfZ3lsbGVuaGFhbCIpIApmaWxtZXMgPC0gcmVhZF9pbXBvcnRlZF9kYXRhKCkKZmlsbWVzICU+JSAKICAgIGdsaW1wc2UoKQpgYGAKCgojIyBEYXRhIE92ZXJ2aWV3CgojIyMgQmlsaGV0ZXJpYQoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBhbm8sIHkgPSBiaWxoZXRlcmlhKSkgKyAKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDQsIGNvbG9yID0gcGFsZXRhWzFdKSAKYGBgCgoKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYmlsaGV0ZXJpYSkpICsgCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwLCBib3VuZGFyeSA9IDAsIAogICAgICAgICAgICAgICAgICAgZmlsbCA9ICJncmV5IiwgY29sb3IgPSAiYmxhY2siKSArIAogICAgZ2VvbV9ydWcoc2l6ZSA9IC41KSArCiAgICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzPXNlcSgwLDIxMCwxMCkpCmBgYAoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBncm91cF9ieShmaWxtZSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHNhbXBsZT1iaWxoZXRlcmlhKSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KcCA8LSBmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gIiIsCiAgICAgICAgICAgICAgIHkgPSBiaWxoZXRlcmlhLAogICAgICAgICAgICAgICBsYWJlbCA9IGZpbG1lLAogICAgICAgICAgICAgICB0ZXh0ID0gcGFzdGUoIkZpbG1lOiIsZmlsbWUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAiXG5CaWxoZXRlcmlhOiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBiaWxoZXRlcmlhLCJtIikpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDUsIGFscGhhID0gLjMsIHNpemUgPSAzKSArIAogICAgbGFicyh4ID0gIiIpCgpnZ3Bsb3RseShwLCB0b29sdGlwPSJ0ZXh0IikgJT4lIAogICAgbGF5b3V0KGF1dG9zaXplID0gRikKYGBgCgojIyMgQXZhbGlhw6fDo28KCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYW5vLCB5ID0gYXZhbGlhY2FvKSkgKyAKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDQsIGNvbG9yID0gcGFsZXRhWzFdKSAgKwogICAgc2NhbGVfeV9jb250aW51b3VzKGxpbWl0cyA9IGMoMCwgMTAwKSkKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdncGxvdChhZXMoeCA9IGF2YWxpYWNhbykpICsgCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwLCBib3VuZGFyeSA9IDAsIAogICAgICAgICAgICAgICAgICAgZmlsbCA9IHBhbGV0YVszXSwgY29sb3IgPSAiYmxhY2siKSArIAogICAgZ2VvbV9ydWcoc2l6ZSA9IC41KSAKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdyb3VwX2J5KGZpbG1lKSAlPiUKICAgIGdncGxvdChhZXMoc2FtcGxlPWF2YWxpYWNhbykpICsgCiAgICBzdGF0X3FxKCkgCmBgYAoKYGBge3J9CnAgPC0gZmlsbWVzICU+JSAKICAgIGdncGxvdChhZXMoeCA9ICIiLAogICAgICAgICAgICAgICB5ID0gYXZhbGlhY2FvLAogICAgICAgICAgICAgICB0ZXh0ID0gcGFzdGUoCiAgICAgICAgICAgICAgICAgICAgIkZpbG1lOiIsZmlsbWUsCiAgICAgICAgICAgICAgICAgICAgIlxuQXZhbGlhw6fDo286IixhdmFsaWFjYW8pKSkgKyAKICAgIGdlb21faml0dGVyKHdpZHRoID0gLjA1LCBhbHBoYSA9IC4zLCBzaXplID0gMykgKyAKICAgIGxhYnMoeCA9ICIiKQoKZ2dwbG90bHkocCwgdG9vbHRpcCA9ICJ0ZXh0IikgJT4lIAogICAgbGF5b3V0KGF1dG9zaXplID0gRikKCmBgYAoKIyMgQWdydXBhbWVudG8gaGllcsOhcnF1aWNvCgojIyMgVW1hIGRpbWVuc8OjbwoKYGBge3J9CmFncnVwYW1lbnRvX2ggPSBmaWxtZXMgJT4lIAogICAgbXV0YXRlKG5vbWUgPSBwYXN0ZTAoZmlsbWUsICIgKGF2PSIsIGF2YWxpYWNhbywgIikiKSkgJT4lIAogICAgYXMuZGF0YS5mcmFtZSgpICU+JSAKICAgIGNvbHVtbl90b19yb3duYW1lcygiZmlsbWUiKSAlPiUgCiAgICBzZWxlY3QoYXZhbGlhY2FvKSAlPiUKICAgIGRpc3QobWV0aG9kID0gImV1Y2xpZGlhbiIpICU+JSAKICAgIGhjbHVzdChtZXRob2QgPSAid2FyZC5EIikKCmdnZGVuZHJvZ3JhbShhZ3J1cGFtZW50b19oLCByb3RhdGUgPSBULCBzaXplID0gMiwgdGhlbWVfZGVuZHJvID0gRikgKyAKICAgIGxhYnMoeSA9ICJEaXNzaW1pbGFyaWRhZGUiLCB4ID0gIiIsIHRpdGxlID0gIkRlbmRyb2dyYW1hIikKYGBgCgpgYGB7cn0KZ2V0X2dydXBvcyA8LSBmdW5jdGlvbihhZ3J1cGFtZW50bywgbnVtX2dydXBvcyl7CiAgICBhZ3J1cGFtZW50byAlPiUgCiAgICAgICAgY3V0cmVlKG51bV9ncnVwb3MpICU+JSAKICAgICAgICBhcy5kYXRhLmZyYW1lKCkgJT4lIAogICAgICAgIG11dGF0ZShsYWJlbCA9IHJvd25hbWVzKC4pKSAlPiUgCiAgICAgICAgZ2F0aGVyKGtleSA9ICAiayIsIHZhbHVlID0gImdydXBvIiwgLWxhYmVsKSAlPiUgCiAgICAgICAgbXV0YXRlKGdydXBvID0gYXMuY2hhcmFjdGVyKGdydXBvKSkKfQoKYXRyaWJ1aWNvZXMgPSBnZXRfZ3J1cG9zKGFncnVwYW1lbnRvX2gsIG51bV9ncnVwb3MgPSAxOjYpCgphdHJpYnVpY29lcyA9IGF0cmlidWljb2VzICU+JSAKICAgIGxlZnRfam9pbihmaWxtZXMsIGJ5ID0gYygibGFiZWwiID0gImZpbG1lIikpCgphdHJpYnVpY29lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSAiRmlsbWVzIiwgeSA9IGF2YWxpYWNhbywgY29sb3VyID0gZ3J1cG8pKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDIsIGhlaWdodCA9IDAsIHNpemUgPSAxLjYsIGFscGhhID0gLjYpICsgCiAgICBmYWNldF93cmFwKH4gcGFzdGUoaywgIiBncnVwb3MiKSkgKyAKICAgIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIkRhcmsyIikKCmBgYAoKYGBge3J9CmtfZXNjb2xoaWRvID0gMwoKcCA8LWF0cmlidWljb2VzICU+JSAKICAgIGZpbHRlcihrID09IGtfZXNjb2xoaWRvKSAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSByZW9yZGVyKGxhYmVsLCBhdmFsaWFjYW8pLAogICAgICAgICAgICAgICB5ID0gYXZhbGlhY2FvLAogICAgICAgICAgICAgICBjb2xvdXIgPSBncnVwbywKICAgICAgICAgICAgICAgdGV4dCA9IHBhc3RlKAogICAgICAgICAgICAgICAgICAgICJGaWxtZToiLCByZW9yZGVyKGxhYmVsLCBhdmFsaWFjYW8pLAogICAgICAgICAgICAgICAgICAgICJcbkF2YWxpYcOnw6NvOiIsIGF2YWxpYWNhbywKICAgICAgICAgICAgICAgICAgICAiXG5HcnVwbzoiLCBncnVwbykpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDIsIGhlaWdodCA9IDAsIHNpemUgPSAzLCBhbHBoYSA9IC42KSArIAogICAgZmFjZXRfd3JhcCh+IHBhc3RlKGssICIgZ3J1cG9zIikpICsgCiAgICBzY2FsZV9jb2xvcl9icmV3ZXIocGFsZXR0ZSA9ICJEYXJrMiIpICsgCiAgICBsYWJzKHggPSAiIiwgeSA9ICJBdmFsaWHDp8OjbyBSVCIpICsgCiAgICBjb29yZF9mbGlwKCkKCmdncGxvdGx5KHAsdG9vbHRpcCA9ICJ0ZXh0IikgJT4lCiAgICBsYXlvdXQoYXV0b3NpemUgPSBGKQoKYGBgCgojIyMgQ29tIGR1YXMgZGltZW5zw7VlcwoKYGBge3J9CmFncnVwYW1lbnRvX2hfMmQgPSBmaWxtZXMgJT4lCiAgIG11dGF0ZShiaWxoZXRlcmlhID0gbG9nMTAoYmlsaGV0ZXJpYSkpICU+JQogICBtdXRhdGVfYXQodmFycygiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSwgZnVucyhzY2FsZSkpICU+JQogICBjb2x1bW5fdG9fcm93bmFtZXMoImZpbG1lIikgJT4lCiAgIHNlbGVjdCgiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSAlPiUKICAgZGlzdChtZXRob2QgPSAiZXVjbGlkZWFuIikgJT4lCiAgIGhjbHVzdChtZXRob2QgPSAid2FyZC5EIikKCmdnZGVuZHJvZ3JhbShhZ3J1cGFtZW50b19oXzJkLCByb3RhdGUgPSBUUlVFLCB0aGVtZV9kZW5kcm8gPSBGKQpgYGAKCmBgYHtyfQpmaWxtZXMyIDwtIGZpbG1lcyAlPiUKICAgIG11dGF0ZShiaWxoZXRlcmlhID0gbG9nMTAoYmlsaGV0ZXJpYSkpCgpwbG90YV9oY2x1c3RzXzJkKGFncnVwYW1lbnRvX2hfMmQsCiAgICAgICAgICAgICAgICBmaWxtZXMyLAogICAgICAgICAgICAgICAgYygiYXZhbGlhY2FvIiwgImJpbGhldGVyaWEiKSwKICAgICAgICAgICAgICAgIGxpbmthZ2VfbWV0aG9kID0gIndhcmQuRCIsIAogICAgICAgICAgICAgICAga3MgPSAxOjYsCiAgICAgICAgICAgICAgICBwYWxldHRlID0gIkRhcmsyIikgKyAKICAgIHNjYWxlX3lfbG9nMTAoKQpgYGAKCmBgYHtyfQphdHJpYnVpY29lcyA9IGdldF9ncnVwb3MoYWdydXBhbWVudG9faF8yZCwgbnVtX2dydXBvcyA9IDE6NikKCmF0cmlidWljb2VzID0gYXRyaWJ1aWNvZXMgJT4lIAogICAgZmlsdGVyKGsgPT0gMykgJT4lCiAgICBtdXRhdGUoZmlsbWUgPSBsYWJlbCkgJT4lIAogICAgbGVmdF9qb2luKGZpbG1lcywgYnkgPSAiZmlsbWUiKQoKcCA8LSBhdHJpYnVpY29lcyAlPiUKICAgIGdncGxvdChhZXMoeCA9IGF2YWxpYWNhbywKICAgICAgICAgICAgICAgeSA9IGJpbGhldGVyaWEsCiAgICAgICAgICAgICAgIGNvbG91ciA9IGdydXBvLAogICAgICAgICAgICAgICB0ZXh0ID0gcGFzdGUoCiAgICAgICAgICAgICAgICAgICAgIkZpbG1lOiIsIGZpbG1lLAogICAgICAgICAgICAgICAgICAgICJcbkJpbGhldGVyaWE6IiwgYmlsaGV0ZXJpYSwibVxuIiwKICAgICAgICAgICAgICAgICAgICAiQXZhbGlhw6fDo286IiwgYXZhbGlhY2FvKSkpICsgCiAgICBnZW9tX2ppdHRlcih3aWR0aCA9IC4wMiwgaGVpZ2h0ID0gMCwgc2l6ZSA9IDMsIGFscGhhID0gLjYpICsgCiAgICBmYWNldF93cmFwKH4gcGFzdGUoaywgIiBncnVwb3MiKSkgKyAKICAgIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIkRhcmsyIikgKwogICAgc2NhbGVfeV9sb2cxMCgpCgpnZ3Bsb3RseShwLCB0b29sdGlwID0gInRleHQiKSAlPiUKICAgIGxheW91dChhdXRvc2l6ZSA9IEYpCmBgYAo=